/* Perform Oaxaca-Blinder-style decomposition on the MSE for humans and econometric inference.
*/
clear
version 13.1
set more off
set scheme  s1mono 
		
cd "${main}"

cap log close
log using "${logs}/experts_vs_estimators_mse_decomposition2.log", replace


***************
* MSE, Bias^2 *
***************
use "${dat}/expert_reported_discontinuities_cleaned.dta", clear
split gid, p("_")
gen file = gid1 + "_" + gid2 + "_" + gid7
merge m:1 file using "${dat}/rd_estimates_micro.dta"
keep if _merge == 3


* Compute rejection decisions
gen rd_c_ik_reject = rd_c_cl_ik > 0 | rd_c_cu_ik < 0
* When estimator doesn't reject null, guess 0
replace rd_c_ik = rd_c_ik * rd_c_ik_reject

* Compute squared error for each estimator
foreach est in playerstated_disc_size rd_c_ik {
	
	replace `est' = round(`est', 0.01)
	gen `est'_t = `est' // Trimmed version
	
	* Trim
	foreach dgp in "ADGP1" "BDGP2" "CDGP3" "DDGP4" "EDGP5" "FDGP6" "GDGP7" "HDGP8" "IDGP9" "JDGP10" "KDGP11" {
		gen tosort1 = playerdgp == "`dgp'"
		gen tosort2 = abs(`est' - discont_raw)
		sort tosort1 tosort2
		replace `est'_t = . if _N - _n < 10
		drop tosort*
	} 
	
	sort playerdgp playerdisc
	* DGPxDiscontinuity average guess for the method
	by playerdgp playerdisc: egen d`est' = mean(`est')
	by playerdgp playerdisc: egen d`est'_t = mean(`est'_t)
	* Bias^2
	gen a`est' = (d`est' - discont_raw)^2
	gen a`est'_t = (d`est'_t - discont_raw)^2
	
	replace `est' = (`est' - discont_raw)^2
	replace `est'_t = (`est'_t - discont_raw)^2
	
}

* Compute averages
collapse (mean) playerstated_disc_size rd_c_ik aplayerstated_disc_size ard_c_ik ///
	playerstated_disc_size_t rd_c_ik_t aplayerstated_disc_size_t ard_c_ik_t, by(playerdgp)

* Go wide to long
* Rename estimators to numbers to allow reshaping
rename playerstated_disc_size mse_h
rename rd_c_ik mse_q
rename aplayerstated_disc_size bias2_h
rename ard_c_ik bias2_q
rename playerstated_disc_size_t mse_h_t
rename rd_c_ik_t mse_q_t
rename aplayerstated_disc_size_t bias2_h_t
rename ard_c_ik_t bias2_q_t

* Compute terms of the decomposition
gen r_h = (mse_h - bias2_h) / bias2_h
gen r_q = (mse_q - bias2_q) / bias2_q
gen mse_diff = mse_h - mse_q
gen decomp1 = bias2_h * (1 + r_q)
gen decomp2 = - bias2_q * (1 + r_q)
gen decomp3 = bias2_h * (r_h - r_q)
gen r_h_t = (mse_h_t - bias2_h_t) / bias2_h_t
gen r_q_t = (mse_q_t - bias2_q_t) / bias2_q_t
gen mse_diff_t = mse_h_t - mse_q_t
gen decomp1_t = bias2_h_t * (1 + r_q_t)
gen decomp2_t = - bias2_q_t * (1 + r_q_t)
gen decomp3_t = bias2_h_t * (r_h_t - r_q_t)
gen bias_diff = bias2_h - bias2_q
gen var_diff = (mse_h - bias2_h) - (mse_q - bias2_q)
gen bias_diff_t = bias2_h_t - bias2_q_t
gen var_diff_t = (mse_h_t - bias2_h_t) - (mse_q_t - bias2_q_t)

label var mse_h "MSE (Expert)"
label var mse_q "MSE (IK)"
label var mse_diff "M_E-M_IK"
label var mse_diff_t "M_E-E_IK"
label var decomp1 "B_E(1+r_IK)"
label var decomp2 "B_IK(1+r_IK)"
label var decomp3 "B_E(r_E-r_IK)"
label var decomp1_t "B_E(1+r_IK)"
label var decomp2_t "B_IK(1+r_IK)"
label var decomp3_t "B_E(r_E-r_IK)"
label var bias_diff "B_E-B_IK"
label var var_diff "V_E-V_IK"
label var bias_diff_t "B_E-B_IK"
label var var_diff_t "V_E-V_IK"


*By player DGP:
*DGP specific graphs:
levelsof playerdgp, local(levels) 
local count = 1
foreach l of local levels{
preserve
keep if playerdgp == "`l'"
	onewayplot mse_diff decomp1 decomp2 decomp3, ///
	variablelabels ///
	xtitle("Value") ///
	title("DGP `count'") ///
	name(graph`count', replace)

local count = `count' + 1
restore
}
graph combine graph1 graph2 graph3 graph4 graph5 graph6 graph7 graph8 graph9 graph10 graph11
graph export "${output}/experts_vs_estimators_mse_decomposition2.pdf", as(pdf) replace


expand 2
expand 2
drop if _n > 33
gen y = 0
gen x = bias_diff
replace x = var_diff if _n > 11
replace x = mse_diff if _n > 22
gen xlab = "B"
replace xlab = "V" if _n > 11
replace xlab = "M" if _n > 22
keep playerdgp x y xlab
	 
*By player DGP:
*DGP specific graphs:
levelsof playerdgp, local(levels) 
local count = 1
foreach l of local levels{
preserve
keep if playerdgp == "`l'"
graph twoway (scatter y x if xlab == "B", msymbol(none) mlabel(xlab) mlabp(0) mlabsize(huge)) ///
	(scatter y x if xlab == "V", msymbol(none) mlabel(xlab) mlabp(0) mlabsize(huge)) ///
	(scatter y x if xlab == "M", msymbol(none) mlabel(xlab) mlabp(0) mlabsize(huge)), ///
	ylabel(, nolabels) ytitle("") xtitle("Value") ///
	 legend(label(1  "B: Bias{c 178}{sub:Expert} - Bias{c 178}{sub:IK}") label(2 "V: Var{sub:Expert} - Var{sub:IK}") label(3 "M: MSE{sub:Expert} - MSE{sub:IK}")) ///
	title("DGP `count'") ///
	name(graph`count', replace)

local count = `count' + 1
restore
}
grc1leg graph1 graph2 graph3 graph4 graph5 graph6 graph7 graph8 graph9 graph10 graph11, legendfrom(`1')
graph export "${output}/experts_vs_estimators_mse_decomposition_alt2.pdf", as(pdf) replace

graph close _all

log close
